# Reading in the data
initial_data <- read.csv("Data_Econ_index.csv", na.strings=c("","NA"))

#removing top two rows
initial_data = initial_data[-c(1,2),]

initial_data$primary.location <- str_replace_all(initial_data$primary.location, "Yerevan", "Yerevan, Armenia")
initial_data$primary.location <- str_replace_all(initial_data$primary.location, "Yerevan, Armenia, Armenia", "Yerevan, Armenia")
initial_data$funding <- str_replace_all(initial_data$funding, "N/A", "Unknown")
initial_data$X..of.founders <- str_replace_all(initial_data$X..of.founders, " N/A", "Unknown")
initial_data$X..of.founders <- str_replace_all(initial_data$X..of.founders, "N/A", "Unknown")
initial_data$X..of.founders <- str_replace_all(initial_data$X..of.founders, "4\\(\\?\\)", "4")



# Changing the missing data to unknown
data_unknowns <- initial_data %>%
  mutate(primary.location = fct_explicit_na(initial_data$primary.location, na_level = "Unknown")) %>%
  mutate(accelerator.incubator = fct_explicit_na(initial_data$accelerator.incubator, na_level = "Unknown")) %>%
  mutate(current.stage = fct_explicit_na(initial_data$current.stage, na_level = "Unknown")) %>%
  mutate(funding = fct_explicit_na(initial_data$funding, na_level = "Unknown")) %>%
  mutate(date.published = fct_explicit_na(initial_data$date.published, na_level = "Unknown")) %>%
  mutate(X..of.founders = fct_explicit_na(initial_data$X..of.founders, na_level = "Unknown")) %>%
  select (-c(date.range.for.government.support))

yearExtract <- function(string) {
  t <- regmatches(string, regexec("[0-9]{4}", string))
  sapply(t, function(x) {
    if(length(x) > 0){
      return(as.numeric(x))
    } else {
      return(NA)    
    }
  })
}

# Changing date published data to be only in years format

data_unknowns$date.published <- yearExtract(as.character(data_unknowns$date.published))

yrs <- data_unknowns$date.published
yr <- as.Date(as.character(yrs), format = "%Y")
data_unknowns$date.published <- year(yr)


# Changing NAs to be Unknown
data_unknowns$date.published[is.na(data_unknowns$date.published)] <- 'Unknown'
plot_ly(data_unknowns, x = ~primary.location)
## No trace type specified:
##   Based on info supplied, a 'histogram' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#histogram
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
plot_ly(data_unknowns, y = ~primary.location, x = ~current.stage,colors = "Accent")
## No trace type specified:
##   Based on info supplied, a 'histogram2d' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#histogram2d
plot_ly(
  data_unknowns, 
  x = ~current.stage, 
  color = I("red"), 
  stroke = I("black"), 
  span = I(2))
## No trace type specified:
##   Based on info supplied, a 'histogram' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#histogram
library(dplyr)
library(babynames)
library(streamgraph)

babynames %>%
  filter(grepl("^Kr", name)) %>%
  group_by(year, name) %>%
  tally(wt=n) %>%
  streamgraph("name", "n", "year")
library(dplyr)
library(streamgraph)
library(pbapply)


data_unknowns2 <- transform(data_unknowns, count = table(date.published)[date.published])



piapa <- data_unknowns2 %>%
 group_by(interaction(current.stage, date.published)) %>% 
  summarise(count = n())
## `summarise()` ungrouping output (override with `.groups` argument)
walk <- data_unknowns2 %>%
  filter(date.published != "Unknown") %>%
  group_by(interaction(current.stage, date.published))

walk2 <- transform(walk, count = table(interaction(current.stage, date.published))[interaction(current.stage, date.published)])


walk2 <- distinct(walk2, count.Var1, .keep_all = TRUE)

walk2 %>%
  streamgraph("current.stage", "count.Freq.1", "date.published") %>%
    sg_fill_manual(c("#ffa500", "blue", "purple", "red", "#00ff00", "red")) %>%
  sg_legend(show=TRUE, label="Phase of Startup")%>%
  sg_axis_x(1, "year", "%Y") %>%
  sg_title(title = "Phases of Startups in Armenia 2016-2020") %>%
  sg_annotate(label="Verkjseflksjflksjflksjflsfjlmont", x=.5, y=.91, color="black", size = 20) 
Phases of Startups in Armenia 2016-2020